In [2]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from __future__ import print_function
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
import cvutil as cvfinn
%matplotlib inline

Image manipulation


In [16]:
# read in an image and display it
img = cv2.imread("../images/cow3.jpg") 
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(rgb)


Out[16]:
<matplotlib.image.AxesImage at 0x7f88900c6350>

What is the type of img? hint: use the type() function


In [17]:
# put your code here

You can select parts of an image using numpy slicing.

Try to get a small image showing just the cow's front leg


In [18]:
roi = rgb[55:110,50:100,:] #[ymin:ymax,xmin:xmax,channels]
plt.imshow(roi)


Out[18]:
<matplotlib.image.AxesImage at 0x7f8890690610>

Color spaces and tracking objects

The photos below contain the same set of object, photgraphed by different cameras and under different lighting conditions. Under each photo we plot the RGB and HSV histograms.


In [19]:
import os
image_names = sorted([os.path.join("../images/lighting",f) for f in os.listdir("../images/lighting")])
images = [cv2.imread(img) for img in image_names]
hsv_images = [cv2.cvtColor(img,cv2.COLOR_BGR2HSV) for img in images]
rgb_images = [cv2.cvtColor(img,cv2.COLOR_BGR2RGB) for img in images]
fig,ax = plt.subplots(3,len(images),figsize=(20,10))
for indx,(img,hsv,rgb) in enumerate(zip(images,hsv_images,rgb_images)):
    ax[0,indx].imshow(rgb)
    cvfinn.plot_histogram(ax[1,indx],img,["blue","green","red"],"BGR")
    cvfinn.plot_histogram(ax[2,indx],hsv,["hue","saturation","value"],"HSV",legend_loc = "upper right")


Looking at these histograms, how might we seperate the blocks from the background? Will it be easier in RGB or HSV space?


In [20]:
lower = np.array([0,100,5])
upper = np.array([180,256,256])
fig,ax = plt.subplots(2,len(images),figsize=(20,8))

for indx,(hsv,rgb) in enumerate(zip(hsv_images,rgb_images)):
    mask = cv2.inRange(hsv,lower,upper) # returns 255 if the pixel is in the specified range for all channels, 0 otherwise
    bmask = (mask > 0)
    pixels = hsv[:,:,0][mask > 0]
    res = cv2.bitwise_and(rgb,rgb, mask=mask)
    hsv_res = cv2.cvtColor(res,cv2.COLOR_RGB2HSV)
    ax[0,indx].imshow(res)
    cvfinn.plot_hsv_hist(ax[1,indx],pixels)


Try messing around with the range parameters to find each color of block as cleanly as possible

Extracting colored objects


In [21]:
hsv = hsv_images[0]
lower = np.array([170,90,0])
upper = np.array([0,256,256])
img = cvfinn.hsvInRange(hsv,lower,upper)
plt.imshow(img,cmap="gray")


Out[21]:
<matplotlib.image.AxesImage at 0x7f8888e4a550>

Thresholding

Thresholding is the process of converting a grayscale image to a binary image.

Code for these examples adapted from http://docs.opencv.org/3.2.0/d7/d4d/tutorial_py_thresholding.html


In [22]:
# Global vs local thresholding
img = cv2.imread('../images/sudoku.jpg',0)
img = cv2.medianBlur(img,5)
ret,th1 = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
th2 = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,11,2)
th3 = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
titles = ['Original Image', 'Global Thresholding',
          'Adaptive Mean Thresholding', 'Adaptive Gaussian Thresholding']
threshold_images = [img, th1, th2, th3]
fig, ax = plt.subplots(1,len(threshold_images),figsize=(20,5))
for indx,img in enumerate(threshold_images):
    ax[indx].imshow(img,cmap="gray")
    ax[indx].set_title(titles[indx])



In [23]:
# Otsu's thresholding 
img = hsv_images[0][:,:,1] #extract just the saturation channel from our block picture.
img = cv2.GaussianBlur(img,(5,5),0)
ret3,th3 = cv2.threshold(img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
fig,ax = plt.subplots(1,2,figsize=(10,5))
ax[0].imshow(img,cmap="gray")
ax[1].imshow(th3,cmap="gray")


Out[23]:
<matplotlib.image.AxesImage at 0x7f8888b0c4d0>

Compare what happens if with and without the GaussianBlur (and with different levels of bluring)

Contours


In [24]:
fig,ax = plt.subplots(1,4,figsize=(20,6))
im = cv2.bitwise_not(cv2.cvtColor(images[0],cv2.COLOR_BGR2GRAY))
im1 = cv2.GaussianBlur(im, (13, 13), 0)
ret,im2 = cv2.threshold(im1,100,255,cv2.THRESH_BINARY)

contour_im = im2
res,contours,hierarchy = cv2.findContours(contour_im.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) #findContours modifies the image
rgb = cv2.cvtColor(contour_im,cv2.COLOR_GRAY2RGB)
cv2.drawContours(rgb, contours, -1, (0,255,0), 3)

ax[0].imshow(im,cmap="gray")
ax[1].imshow(im1,cmap="gray")
ax[2].imshow(im2,cmap="gray")
ax[3].imshow(rgb)


Out[24]:
<matplotlib.image.AxesImage at 0x7f88891e1d10>

Histogram back projection

  • Obtain a representative sample of region of interest
  • Create a histogram of the colors in that sample
  • Normalize the histogram so that that the counts sum to 1. Now we interpret the bins in the histograms as the probability of seeing that color value in the region of interest.
  • For all the pixels in an image, lookup the probability that that pixels color is in the roi.

In [25]:
import os
path = "../images"
images = ["cow1.jpg","cow2.jpg","cow3.jpg","cow4.jpg"]
images = [cv2.imread(os.path.join(path,image),cv2.IMREAD_COLOR) for image in images]
rgb = [cv2.cvtColor(image,cv2.COLOR_BGR2RGB) for image in images]
hsv_images = [cv2.cvtColor(image,cv2.COLOR_BGR2HSV) for image in images]
rois = [(0,0,250,30),(0,150,250,180),(0,150,250,180),(0,150,250,180)]

# plot all the images with the regions we selected as containing grass (note they are all the same shape for convinient stacking)
fig,ax = plt.subplots(2,len(images),figsize = (20,5))

# make an hsv image containing all the grass regions we have selected.
grass = np.vstack((image[ymin:ymax,xmin:xmax] for ((xmin,ymin,xmax,ymax),image) in zip(rois,hsv_images) ))
hist = cv2.calcHist([grass],[0,1],mask = None, histSize = [8,8], ranges = [0, 180, 0, 256])
cv2.normalize(hist,hist,0,1,cv2.NORM_MINMAX)

for indx,image in enumerate(rgb):
    hsv = hsv_images[indx]
    xmin,ymin,xmax,ymax = rois[indx]
    cv2.rectangle(image,(xmin,ymin),(xmax,ymax),(255,0,0),1)
    ax[0,indx].imshow(image)
    dst = cv2.calcBackProject([hsv],[0,1],hist,[0, 180, 0, 256],scale=255) 
    ax[1,indx].imshow(dst,cmap="gray")
    
fig,ax = plt.subplots()
ax.imshow(hist,interpolation="nearest",cmap="gray")


Out[25]:
<matplotlib.image.AxesImage at 0x7f8889783c50>

Experiment with applying thresholds on the backprojected images to extract the grass region. Some pixels that are part of the grass are marked as having very low probability of being grass (show up dark in the images above. How might we improve finding continuous regions of grass?

Edges

Edge detection algorithms find regions in the image where the intensity changes quickly (high intensity gradient).

Canny edge detection takes two parameters maxval - the value of gradient above which pixels are considered definately part of an edge minval - the value of gradient below which pixels are considered not part of an edge Pixels with gradients between these values are considered edge pixels if the are connected to "definately edge" pixels.

For more details http://docs.opencv.org/3.2.0/da/d22/tutorial_py_canny.html


In [26]:
grey = cv2.cvtColor(rgb_images[0],cv2.COLOR_RGB2GRAY)[200:310,230:370]

def find_edges(minval,maxval):
    edges = cv2.Canny(grey,minval,maxval)
    fig,ax = plt.subplots(1,2,figsize=(20,10))
    ax[0].imshow(grey,cmap = 'gray')
    ax[1].imshow(edges,cmap = 'gray')
    
find_edges(90,125)


Template matching

Template matching searches for a small image (template) inside a larger image by sliding the template over the image and comparing the pixel values. The template needs to be very similar to the regions in the image on a pixel wise basis. It is not invariant to scale or rotation.

For more details see

http://docs.opencv.org/3.2.0/d4/dc6/tutorial_py_template_matching.html http://docs.opencv.org/3.2.0/df/dfb/group__imgproc__object.html


In [27]:
img = cv2.imread("../images/windows.jpg") #[ymin:ymax,xmin:xmax,channels]
rgb_orig = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
rgb = rgb_orig.copy()
template = rgb[16:41,54:82,:].copy()
w,h,_ = template.shape

method = cv2.TM_CCOEFF #'cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR', 'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED'
            
res = cv2.matchTemplate(rgb,template,method)
res = cv2.normalize(res, 0, 255, norm_type=cv2.NORM_MINMAX)

# draw a blue rectangle around all the places the template matches (above some threshold)
threshold = 180
matches = np.where(res > threshold)
for top_left in zip(matches[1],matches[0]):
    bottom_right = (top_left[0] + w, top_left[1] + h)
    cv2.rectangle(rgb,top_left,bottom_right,(0,0,255),2)

# draw a red rectangle around the single best match for the template in the image    
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)   
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(rgb,top_left, bottom_right, 255, 2)

fig,ax = plt.subplots(1,4,figsize=(20,5))
ax[0].imshow(rgb_orig)
ax[1].imshow(template)
ax[2].imshow(res,cmap="gray")
ax[3].imshow(rgb)


Out[27]:
<matplotlib.image.AxesImage at 0x7f88893cd650>

Play around with the different methods for template matching. If you look closely at the picture on the right, you will see many of the windows have been found multiple times. Can you think of a way to fix this?

Feature matching


In [28]:
img1 = cv2.cvtColor(cv2.imread("../images/noodles1.jpg"),cv2.COLOR_BGR2RGB)
img2 = cv2.cvtColor(cv2.imread("../images/noodles2.jpg"),cv2.COLOR_BGR2RGB)

orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(img1,None)
kp2,des2 = orb.detectAndCompute(img2,None)

img1_with_keypoints = cv2.drawKeypoints(img1, kp1, None, color=(0,255,0), flags=0)
fig,ax = plt.subplots(1,2,figsize=(20,5))
ax[0].imshow(img1_with_keypoints), ax[1].imshow(img2)

# create BFMatcher object
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
# Match descriptors.
matches = bf.match(des1,des2)
# Sort them in the order of their distance.
matches = sorted(matches, key = lambda x:x.distance)
# Draw first 50 matches.
img3 = cv2.drawMatches(img1,kp1,img2,kp2,matches[:50],None,flags=2)
plt.imshow(img3),plt.show()


Out[28]:
(<matplotlib.image.AxesImage at 0x7f88890c7890>, None)

Machine Learning

Pedestrian detection with HOG

HOG stands for histogram of oriented gradients. It is an image feature built by making histogrms of gradients (directions in which the image changes most quickly) for each of many small subsets in the image. The features are then fed into a support vector machine (SVM) to clasify if the image contains the object of interest or not.

Reasonable HOG detectors can be trained with ~100 images. HOG is not invarient to rotation. If you need to recognise things with variable orientation, the standard work-around is to train multiple seperate classifies, one for each rotation (eg 0°,45°,90°, etc)

Here is a nice explanation of how HOG features work http://www.learnopencv.com/histogram-of-oriented-gradients/

and how to combine them with an SVM to make a classifier http://www.learnopencv.com/handwritten-digits-classification-an-opencv-c-python-tutorial/

And some nice visualizations of how an algorithm using HOG sees the world. http://web.mit.edu/vondrick/ihog/


In [3]:
# code adapted from http://www.pyimagesearch.com/2015/11/09/pedestrian-detection-opencv/
from imutils.object_detection import non_max_suppression


image = cv2.imread("../images/person.jpg")
orig = image.copy()

# create the HOG detector
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
 
# detect people in the image
(rects, weights) = hog.detectMultiScale(image, winStride=(4, 4),padding=(8, 8), scale=1.05)
 
# draw all the bounding boxes on to orig
for (x, y, w, h) in rects:
    cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)
 
# where boxes very substantially overlap with one-another, just select one. (Some overlap is allowed as pedestrains can be near each other)
rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
 
# draw the filtered bounding boxes onto image
for (xA, yA, xB, yB) in pick:
    cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)

# plot the results  
fig,ax = plt.subplots(1,2,figsize=(10,5))
ax[0].imshow(cv2.cvtColor(orig,cv2.COLOR_BGR2RGB))
ax[1].imshow(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))


Out[3]:
<matplotlib.image.AxesImage at 0x7fd4d8e9f190>

Training your own HOG detector

I'm sure this is possible with Opencv but with dlib (which also has an interface to python and is installed on your pi) it is really nice. You can mix functions from opencv, dlib (and other libraries like scikit-learn) as you wish in your python scripts.

Look in for the dlib folder in the workshop directory. In there you should find a number of example python scripts. Including one for face detection with HOG and instructions at the bottom for training your own HOG detector.